library(survey)

# Import Data -------------------------------------------------------------
#setwd("")
source("01_cleaning_raceIAT2019.R")
source("01_cleaning_raceIAT2020.R")


# Create Targets ----------------------------------------------------------
## Picking a week to establish as baseline 
WEEKS <- unique(impdat20_bw$week)
set.seed(1693)
weight_week <- sample(WEEKS[1:16], 1) # any week between january and may
weight_week 
# note: seed changes across R versions so caution
weight_week_dat <- subset(impdat20_bw, week == weight_week  & 
                            !is.na(sex)  & !is.na(race5) & !is.na(edu_cat4) & !is.na(age_cat4) & 
                            !is.na(census_region) & !is.na(broughtwebsite2) & 
                            !is.na(ideo3))

base_week <- svydesign(~1, data = weight_week_dat)

# FUNCTION FOR CREATING TARGETS FROM AUXILIARY INFORMATION AND FORMULA
# From Caughey et al 2020
create_targets <- function (target_design, target_formula) {
  target_mf <- model.frame(target_formula, model.frame(target_design))
  target_mm <- model.matrix(target_formula, target_mf)
  wts <- weights(target_design)
  colSums(target_mm * wts) / sum(wts) # returns vector of targets
}

# Note Targets
target_formula <- ~ (race3 + ideo3_lab + sex)^2 + 
  (race3 + ideo3_lab + edu_cat4)^3 + 
  (race3 + ideo3_lab + age_cat4)^3 + 
  (race3 + ideo3_lab + census_region)^2 + 
  (race3 + ideo3_lab + broughtwebsite2)^2
pop_targets <- create_targets(base_week, target_formula)
pop_targets

# Creating Weights by Week ------------------------------------------------
impdat1920.weights <- array(NA, c(1, 2))
colnames(impdat1920.weights) <- c("session_id", "WeekWeights")

WEEKS <- sort(unique(impdat19_bw$week))

for(i in 1:length(WEEKS)){
  print(i/length(WEEKS))
  dat <- impdat19_bw[which(impdat19_bw$week == WEEKS[i]),]
  dat <- subset(dat, !is.na(sex) & !is.na(race5) & !is.na(edu_cat4) & !is.na(age_cat4) & !is.na(census_region)
                & !is.na(broughtwebsite2) & !is.na(ideo3))
  
  print(nrow(dat))
  
  if(nrow(dat) > 500){
    impdat19.unw <- svydesign(ids = ~1, data = dat)
    x <- array(NA, c(nrow(dat), 2))
    x[,1] <- dat$session_id
    
    d_weighted <- calibrate(design = impdat19.unw,
                            formula = target_formula,
                            population = pop_targets,
                            calfun = "raking",
                            maxit = 2000, 
                            epsilon = 0.0019)
    
    ## Verify targets (uncomment for weeks if wanted)
    ### means
    # unadjusted
    # svymean(~sex + edu_cat4 + race3 + age_cat4 + census_region + broughtwebsite2 + ideo3_lab, impdat19.unw)
    # # target
    # svymean(~sex + edu_cat4 + race3 + age_cat4 + census_region + broughtwebsite2 + ideo3_lab, base_week)
    # # rake
    # svymean(~sex + edu_cat4 + race3 + age_cat4 + census_region + broughtwebsite2 + ideo3_lab, d_weighted)
    # # unadjusted
    # svyby(~edu_cat4, ~ideo3_lab, impdat19.unw, svymean)
    # # target
    # svyby(~edu_cat4, ~ideo3_lab, base_week, svymean)
    # # rake
    # svyby(~edu_cat4, ~ideo3_lab, d_weighted, svymean) 
    
    x[,2] <- weights(d_weighted)
  }
  
  if(nrow(dat) < 500){
    x[,2] <- NA
  }
  
  impdat1920.weights <- rbind(impdat1920.weights,x)
}

impdat1920.WeekWeight <- merge(impdat19_bw, impdat1920.weights, by = "session_id", all.x = T)
rm(impdat1920.weights, impdat19.unw)

# summary(impdat19.weighted$WeekWeights)
# hist(impdat19.weighted$WeekWeights)
# sd(impdat19.weighted $WeekWeights, na.rm = T)

# checking selected week is all 1
# summary(impdat19.weighted$WeekWeights[which(impdat19.weighted$week == "04")])

save(impdat1920.WeekWeight, file = "./data/impdat1920.Race.WeekWeight.rdata")

